---
title: "Replication materials"
author: "Ravi Bhavnani, Nina Schlager, Mirko Reul and Karsten Donnay"
subtitle: Household trajectories of resilience to acute malnutrition in the Kenyan
  drylands
output:
  pdf_document: 
    fig_caption: yes
editor_options:
  markdown:
    wrap: 72
  chunk_output_type: inline
---

```{r knitr_options, echo=FALSE}
# set initial knitr options
knitr::opts_chunk$set(eval = TRUE, echo = FALSE,
                      fig.align = "center",
                      fig.asp = 0.7,
                      dpi = 300,
                      out.width = "80%",
                      fig.pos = "!htp",
                      out.extra = "")
```

This file contains replication information for the analysis performed in
Bhavnani et al. (2023). *'Household trajectories of resilience to acute
malnutrition in the Kenyan drylands'*, Frontiers in Sustainable Food
Systems, DOI:
[10.3389/fsufs.2023.1091346](https://doi.org/10.3389/fsufs.2023.1091346)

There are 3 distinct steps in the analysis:

1.  Data preparation and resilience calculation

    > -- Geo-coding of wards
    >
    > -- Ward resilience to acute malnutrition
    >
    > -- External stressor intensity

2.  Latent Class Mixed Model (LCMM)

3.  Cox Regression Analysis

Please also see `readme.txt` for additional information on the data
inputs and variable coding. Note that most illustrations featured in the
published paper have been re-processed in *Inkscape* to maximize
clarity.

```{r setup, echo=FALSE, message=FALSE}
knitr::opts_chunk$set(include = FALSE)

# define colors
BLUES   <- c("#47affc","#2186f4","#4779ea","#567ecc")
REDS   <- c("#ffb3de" ,"#ff6699", "#ff6161","#de2414")
GREENS <- c("#ccff66","#78e027","#12cc6a","#00917c")
WHITE  <- "#eef2e3"
IHEID.GRAY <-  "#5c666f"

# get working directory
wd = getwd()

# load helper functions and packages
source("code/helper_packages_load.R")
source("code/helper_functions.R")

# load packages
packages_load(install = FALSE)
```

# 1. Data preparation and resilience calculation

## 1.1 Geo-coding of wards

The raw NDMA data used is `MUAC_all.csv`. All observations are geocoded
at the ward level (adm3). We therefore compare ward names provided by
the NDMA with spatial information from [the GADM
project](https://gadm.org). `geocoding_wardnames_all.txt` lists all
differences. The adjusted spatial information is stored in
`KEN_adm3-ndma.shp` and can be found in the sub-directory
`gadm36_KEN_shp`. `helper_functions_geocoding.R` includes a function to
check the number of incorrectly coded observations in a sample.

```{r geocoding, echo=TRUE, include=TRUE, results = FALSE, message=FALSE, warning=FALSE}

# load helper function 
source("code/helper_functions_geocoding.R")

# load MUAC data 
df <- read.csv("data/MUAC_all.csv")

# re-code county (adm1) names
df$adm1 <- dplyr::recode(df$adm1,
                                 "Tana river" = "Tana River",
                                 "West pokot" = "West Pokot",
                                 "Tharaka Nithi" = "Tharaka-Nithi")
  
# re-assign county (adm1) names
df <- df %>% mutate(
      adm1 = replace(adm1, adm3 == "Kamarandi", "Embu"), 
      adm1 = replace(adm1, adm3 == "Nanighi", "Garissa"),
      adm1 = replace(adm1, adm3 == "Kapenguria", "West Pokot"), 
      adm1 = replace(adm1, adm3 == "Riwo", "Baringo") 
    )
  
# re-code ward (adm3) names
df$adm3 <- dplyr::recode(df$adm3,
                                 "Komolion" = NA_character_,
                                 "Ribko" = "Ribkwo",
                                 "Orus" = NA_character_,
                                 "Saimo Soi" = "Saimo/Soi",
                                 "Akoret" = NA_character_,
                                 "Kanyuambora" = NA_character_,
                                 "Kamarandi" = NA_character_,
                                 "Abakayle" = "Abakaile",
                                 "Oldonyiro" = "Oldo/Nyiro",
                                 "Charri" = "Chari",
                                 "Bula Pesa" = "Bulla Pesa",
                                 "Loodokilani" = "Iloodokilani",
                                 "Ewuaso Kidong" = "Ewuaso Oonkidong'I",
                                 "Mbirikani" = "Mbirikani/Eselenkei",
                                 "Oldonyonyokie" = "Keekonyokie",
                                 "KANYANGI" = "Kanyangi",
                                 "Chengoni-Samburu" = "Chengoni/Samburu",
                                 "Waa-Ng'ombeni" = "Waa",
                                 "Kasemeni" = "Kinango", 
                                 "Mukogodo East" = "Mugogodo East",
                                 "Mukogodo West" = "Mugogodo West",
                                 "Baharini" = "Bahari",
                                 "NGUU/MASAMBA" = "Nguu/Masumba",
                                 "KASIKEU" = "Kasikeu",
                                 "MUKAA" = "Mukaa",
                                 "Simbir Fatuma" = "Shimbir Fatuma",
                                 "Banisa" = "Banissa",
                                 "Warankara" = "Waranqara",
                                 "Rhamu Dimtu" = "Rhamu-Dimtu",
                                 "Sagante" = "Sagante/Jaldesa",
                                 "Korr" = "Korr/Ngurunit",
                                 "Heilu Manyatta" = "Heillu/Manyatta",
                                 "Akithi" = "Akithii",
                                 "Amwitha" = "Amwathi",
                                 "Naroosura" = "Majimoto/Naroosura",
                                 "Thegu river" = "Thegu River",
                                 "Suguta Mar Mar" = "Suguta Marmar",
                                 "Wusi" = "Wusi/Kishamba",
                                 "Challa" = "Chala",
                                 "Wumingu" = "Wumingu/Kishushe",
                                 "Tunyai" = NA_character_,
                                 "Kamarandi" = NA_character_,
                                 "Kalemng'orok" = "Katilu",
                                 "Lokori/Kachodin" = "Lokiriama/Lorengippi",
                                 "Lorugum" = "Turkwel",
                                 "Nachukui" = "Kalokol",
                                 "Hadado" = "Hadado/Athibohol",
                                 "Lagbogol" = "Lagboghol South",
                                 "Barwaqo" = "Barwago",
                                 "Dasheq" = "Tarbaj",
                                 "Elnur" = "Elnur/Tula Tula",
                                 "Riwo" = "Lokori/Kochodin",
                                 "Masol" = "Masool")

# check if number of observations dropped = 0
df <- geocode_df(df)

```

```{r fig1, echo=TRUE, include=TRUE, results = FALSE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Map of 141 arid- and semi-arid study sites (Figure 1)", fig.height=6}

# load spatial data
# - unzip if shape files are not unpacked
if (!file.exists("shapefiles/gadm36_KEN_1.shp")){
  unzip('shapefiles/gadm36_KEN_1.zip')
}
# - load Kenya GADM ADM-1 shape file
Kenya1 <- sf::st_read("shapefiles/gadm36_KEN_1.shp")
# - unzip if shape files are not unpacked
if (!file.exists("shapefiles/KEN_adm3-ndma.shp")){
  unzip("shapefiles/KEN_adm3-ndma.zip")
}
# - load Kenya NDMA ADM-3 shape file
Kenya3_ndma <- sf::st_read("shapefiles/KEN_adm3-ndma.shp")

# add spatial information
Kenya3_ndma <- merge(Kenya3_ndma, df[!duplicated(df$adm3),], 
                     by.x = "NAME_3", by.y = "adm3")
Kenya1_ndma <- merge(Kenya1, df[!duplicated(df$adm1),], 
                     by.x = "NAME_1", by.y = "adm1")

# list arid vs semi-arid counties
arid <- Kenya1[Kenya1$NAME_1 %in% 
                 c("Baringo", "Garissa", "Isiolo", "Mandera", 
                   "Marsabit", "Samburu", "Tana River", 
                   "Turkana", "Wajir"),]

semi <- Kenya1[Kenya1$NAME_1 %in% 
                 c("Embu", "Kajiado", "Kilifi", "Kitui", "Kwale", 
                   "Laikipia", "Lamu", "Makueni", "Meru", "Narok", 
                   "Nyeri", "Taita Taveta", "West Pokot"),]

# list arid vs semi-arid wards
arid.adm3 <- Kenya3_ndma[Kenya3_ndma$NAME_1 %in% 
                           c("Baringo", "Garissa", "Isiolo", "Mandera", 
                             "Marsabit", "Samburu", "Tana River", 
                             "Turkana", "Wajir"),]

semi.adm3 <- Kenya3_ndma[Kenya3_ndma$NAME_1 %in% 
                 c("Embu", "Kajiado", "Kilifi", "Kitui", "Kwale", 
                   "Laikipia", "Lamu", "Makueni", "Meru", "Narok", 
                   "Nyeri", "Taita Taveta", "West Pokot"),]

# plot
fig1 = ggplot() + 
  geom_sf(data = Kenya1,  inherit.aes = FALSE, 
          fill = "white", color= "#5c666f", 
          linewidth = .25, alpha = .001) + 
  geom_sf(data = semi.adm3, inherit.aes = FALSE, 
          fill = "#ff6161", color= "#5c666f", 
          linewidth = .25, alpha= .65) + 
  geom_sf(data = arid.adm3, inherit.aes = FALSE, 
          fill = "#ffb3de", color= "#5c666f", 
          linewidth = .25, alpha= .65) + 
  geom_sf_text(data = arid, aes(label = NAME_1), 
               check_overlap = TRUE, size = 3, 
               face = "bold", color = "gray15") + 
  geom_sf_text(data = semi, aes(label = NAME_1), 
               check_overlap = TRUE, size = 3, 
               face = "bold", color = "gray15") + 
  theme_void() + 
  theme(legend.position = "bottom", 
        legend.box = "horizontal", 
        legend.box.spacing = element_blank(),
        legend.text = element_text(size=14)) + 
  coord_sf(xlim = c(32, 45), ylim = c(-6, 6)) +
  labs(x = "", y = "")

plot(fig1)

```

## 1.2 Ward resilience to acute malnutrition

We calculate ward resilience (*r_adm3*) on the basis of monthly MUAC
information of individual children in 22 counties between 2016 and 2020.

```{r resilience, message=FALSE, include=TRUE, echo =TRUE}

# define MUAC threshold in mm (Risk of acute malnutrition, WHO)
THRESHOLD = 135 

# calculate ward resilience 
resilience_adm3 <- df
resilience_adm3 <- resilience_adm3 %>%
  group_by(adm3, date) %>%
  dplyr::mutate(id_r = cur_group_id())
excluded_hh = 0
dfs <- list()
for(i in 1:length(unique(resilience_adm3$id_r))) {
  
  # subset by adm3
  dat <- subset(resilience_adm3,
                resilience_adm3$id_r==
                  unique(resilience_adm3$id_r)[i])
  
  # calculate MUAC distance to threshold
  # - difference calculation is entry-wise
  above <- subset(dat, dat$MUAC > THRESHOLD)
  below <- subset(dat, dat$MUAC <= THRESHOLD)
  above$diff = above$MUAC - THRESHOLD
  below$diff = THRESHOLD - below$MUAC
  # construct joint data frame
  if(nrow(below) > 0 && nrow(above) > 0) {
    dat <- rbind(above, below)
  } else {
    if(nrow(above) > 0)
      dat <- above
    if(nrow(below) > 0)
      dat <- below
  }
  
  # - integrate/sum over the differences above & below threshold
  # - relative weight not up/down-ward biased due to value range!
  if (nrow(above)>0){
    upper = sum(above$diff)
  }else{
    upper = 0
  }
  if (nrow(below)>0){
    lower = sum(below$diff)
  }else{
    lower = 0
  }
  
  r = 1/2*((upper-lower)/(upper+lower)+1)
  
  # construct data frame
  dfs[[i]] <- data.frame(
    "adm1" = unique(dat$adm1),
    "adm3" = unique(dat$adm3),
    "adm3ID" = unique(dat$adm3ID),
    "date" = unique(dat$date),
    "r_adm3" = r,
    "GAM_adm1" = unique(dat$GAM_adm1),
    "n_obs_adm3" = nrow(dat),
    "n_obs_adm3_above" = nrow(above),
    "n_obs_adm3_below" = nrow(below))
}

resilience_adm3 <- do.call(rbind, dfs)

df <- data.frame(resilience_adm3)
```

```{r fig2_right, include=TRUE, echo = TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Average resilience for the period between 2016 and 2020 with smaller values indicating lower resilience. Blue dots display the mean proportion of malnourished children relative to the population size per ward for the same period(Figure 2, right)", fig.width=10,fig.height=11}

# calculate malnutrition prevalence per adm3-month
df <- df %>%
  dplyr::group_by(adm3, date)%>%
  dplyr::mutate(adm3_prevalence = (n_obs_adm3_below/n_obs_adm3)*100)

# calculate mean resilience per adm3  
adm3 <- df %>%
  dplyr::group_by(adm3)%>%
  dplyr::summarize(resilience.mean = mean(r_adm3, na.rm = TRUE),
                   prev.mean = mean(adm3_prevalence, na.rm = TRUE))

df <- merge(df, adm3, by = "adm3")

# re-code to categorical
df$resilience.cat <- as.factor(
  ifelse(df$resilience.mean < 0.25, 
    1, 
    ifelse(df$resilience.mean>= 0.25 & df$resilience.mean < 0.5, 
      2, 
      ifelse(df$resilience.mean >= 0.5 & df$resilience.mean < 0.75, 
        3, 
        4))))

# code ward malnutrition prevalence as IPC categories
df$GAM_adm1 <- df$GAM_adm1*100
df$ipc <- as.factor(
  ifelse(df$prev.mean < 5, 
    1, # "Phase 1"
    ifelse(df$prev.mean >= 5 & df$prev.mean < 10, 
      2, # "Phase 2"
      ifelse(df$prev.mean >= 10 & df$prev.mean < 15, 
         3, # "Phase 3"
         ifelse(df$prev.mean >= 15 & df$prev.mean < 30, 
            4, # "Phase 4"
            5))))) # Phase 5

df <- data.frame(df)

# add spatial information
Kenya3_ndma <- merge(Kenya3_ndma, df[!duplicated(df$adm3),], 
                     by.x = "NAME_3", by.y = "adm3")
Kenya1_ndma <- merge(Kenya1, df[!duplicated(df$adm1),], 
                     by.x = "NAME_1", by.y = "adm1")

# plot right side of figure 2

fig2.2 = ggplot() + 
  geom_sf(data = Kenya1,  inherit.aes = FALSE, fill = "white", 
          color= "#5c666f", linewidth = .25, alpha = .001) + 
  geom_sf(data = Kenya3_ndma, 
          aes(fill = resilience.cat), alpha= .65) + 
  stat_sf_coordinates(data = Kenya3_ndma, 
                      aes(color = ipc), size = 2.5, alpha = .9) +
  scale_fill_manual(name = "Resilience average",
                    values = c("#78e027",
                               "#12cc6a","#00917c"),
                    labels = c("0.25 - 0.49", 
                               "0.50 - 0.74", "> 0.75")) +
  guides(colour=guide_legend(override.aes = list(size = 3), 
                             title.position = "top", nrow = 1), size = FALSE) + 
  scale_color_manual(labels = c("< 5.0", "5.0 - 9.9", 
                                "10.0 - 14.9", "15.0 - 29.9", 
                                "< 30.0"),
                     name = "Malnutrition prevalence (%)" ,
                     values = c("#6693f5", "#91AEEF",
                                "#a3a9fd","#6048c0", 
                                "#491e96")) +
  theme_void() + 
  theme(legend.position = "right", 
        legend.box = "vertical", 
        legend.box.spacing = element_blank(),
        legend.title = element_text(size=10, face = "bold"), 
        legend.text = element_text(size=10)) + 
  coord_sf(xlim = c(32, 45), ylim = c(-6, 6)) +
  labs(x = "", y = "")

plot(fig2.2)

```

## 1.3 External stressor intensity

We include information on climate, conflict and food price stressor
intensity at the county-month-level to identify typical ward-resilience
trajectories.

-   `kenya_climate_monthadm1.RData`: The climate data are based on the
    "MOD13A3 v006" NDVI data available
    [here](https://lpdaac.usgs.gov/products/mod13a3v006/). For each
    county-month in our sample, we calculate spatially averaged NDVI.

-   `kenya_markets_monthadm1.RData`: Information regarding market price
    developments for 1kg maize are provided by the
    [NDMA](https://www.ndma.go.ke) via expert interviews. We calculate
    monthly deviation rates from a three-year average and categorize
    each month as stressed or not, where stressor intensity is defined
    as the mean price increase above the long-term average.

-   `kenya_conflict_monthadm1.RData`: Lastly, to account for the
    intensity of conflict, we draw on the [Uppsala Conflict Data
    Program's Georeferenced Events Dataset](https://ucdp.uu.se) (see
    Sundberg & Melander, 2013).

```{r fig3_bottom, include=TRUE, echo = TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Kenya stressor intensity for the period between 2016 and 2020 (Figure 3, bottom)", fig.height=4}

# load data
load("data/kenya_markets_monthadm1.RData")
load("data/kenya_conflict_monthadm1.RData")
load("data/kenya_climate_monthadm1.RData")

# prepare for plotting
dfplot2 <- Reduce(function(x, y) merge(x, y,
                                       by=c("adm1", "year", "month"), 
                                       all=TRUE), 
                  list(conflict_month, climate_month, markets_month))
dfplot2$date <- as.Date(paste(dfplot2$year, dfplot2$month, "01", sep="-"))
dfplot2[is.na(dfplot2$stressor_conflict),]$stressor_conflict<-0
dfplot2[is.na(dfplot2$stressor_conflict_intensity),]$stressor_conflict_intensity<-0

dfplot2 <- dfplot2 %>%
  group_by(adm1, date) %>%
  mutate(stressors = sum(stressor_conflict_intensity, 
                         stressor_markets_intensity, 
                         stressor_climate_intensity))
dfplot2 <- subset(dfplot2, !is.na(dfplot2$stressors))

dfplot3 <- aggregate(cbind(stressor_climate_intensity, 
                           stressor_conflict_intensity, 
                           stressor_markets_intensity, 
                           stressors) ~ date, data=dfplot2, FUN=mean)

dfplot3 <- dfplot3[c("date", "stressor_climate_intensity", 
                     "stressor_conflict_intensity", 
                     "stressor_markets_intensity", 
                     "stressors")]
dfplot3 <- reshape2::melt(dfplot3, id.vars=c("date"), 
                          measure.vars=c("stressor_climate_intensity", 
                                         "stressor_conflict_intensity", 
                                         "stressor_markets_intensity"))

# plot bottom of figure 3

fig3.1 <- ggplot2::ggplot(dfplot2, aes(x=date, fill="adm")) +
  geom_area(data=dfplot3, aes(y=value, fill=variable), position = 'stack', 
            color='black', size=0.2, alpha=0.8) +
 scale_x_date(breaks=c(seq.Date(min(dfplot3$date), 
                                max(dfplot3$date), 
                                by="quarter"), as.Date("2020-03-01")),
               labels=format(c(seq.Date(min(dfplot3$date), 
                                        max(dfplot3$date), 
                                        by="quarter"), 
                               as.Date("2020-03-01")), "%b %Y"),
               expand = c(0, 0.02)) +
   scale_y_continuous(breaks = c(3*0.05,3*0.10,3*0.15,3*0.20,
                                 3*0.25,3*0.3,3*0.5,3*0.75,3), 
                      limits = c(0, 3), 
                      labels=c("5%", "10%", "15%", "20%", "25%", 
                               "30%", "50%", "75%", "100%"), 
                      expand = c(0, 0)) +
   scale_fill_manual(name="stressor",
                     breaks=c("stressor_climate_intensity", 
                              "stressor_conflict_intensity", 
                              "stressor_markets_intensity"),
                     labels=c("climate", "conflict", "markets"),
                     values=c("#fdebf3" ,"#ccff66", "#513223")) +
   theme(panel.grid.major = element_blank(), 
         panel.grid.minor = element_blank(), 
         panel.background = element_blank(), 
         axis.line =  element_line(colour = "black"),
         legend.position="right", legend.box = "vertical", 
         legend.title = element_text(size=10), 
         legend.text = element_text(size=10)) +
   labs(y="Stressor Intensity", x="Month")

plot(fig3.1)

```

# 2. Latent class mixed model (LCMM)

We fit a Latent Class Mixed Model (LCMM) to identify latent patterns in
the observed ward resilience and use the fitted model to classify wards
into groups with similar resilience trajectories.

We include below the code we used to identify the best model based on
the smallest BIC, depending on alternative link functions and 1-10
classes. Note that as a function of the slightly different, anonymised
input data, it's impossible to directly replicate the merger for the
ward-level resilience trajectories and figures as reported in the
article. To save computing time, we provide this information, i.e. the
resilience trajectory class of each household (excluding ward-names) in
a separate file called `household_all.csv`.

```{r lcmm, eval=FALSE, echo=TRUE, message=FALSE, include=TRUE}

# 1. explore alternative link functions

set.seed(14)

# beta
b1 <- lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
           + stressor_conflict_intensity + stressor_markets_intensity
           ,random=~time, subject = 'adm3ID', link = "beta"
           ,ng = 1, data = df)
summary(b1)

# splines
s1 <- lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
           + stressor_conflict_intensity + stressor_markets_intensity
           ,random=~time, subject = 'adm3ID', link = "splines"
           ,ng = 1, data = df)
summary(s1)

# 2. explore alternative numbers of classes

# initial model with ng=1 for the random initial values
s1 <- lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
           + stressor_conflict_intensity + stressor_markets_intensity
           ,random=~time, subject = 'adm3ID', link = "splines"
           ,ng = 1, data = df)
summary(s1)

# model with ng=2 
s2 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
                    ,random =~time 
                    ,mixture=~time
                    ,subject = 'adm3ID', link = "splines"
                    ,ng = 2, data = df
                    ,B = s1
                    ,maxiter=100)
summary(s2)

# model with ng=3 
s3 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 3, data = df 
            ,B = s1
            ,maxiter=250)
summary(s3)

# model with ng=4
s4 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 4, data = df 
            ,B = s1
            ,maxiter=250)
summary(s4)

# model with ng=5
s5 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 5, data = df 
            ,B = s1
            ,maxiter=250)
summary(s5)

# model with ng=6
s6 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 6, data = df 
            ,B = s1
            ,maxiter=250)
summary(s6)

# model with ng=7
s7 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 7, data = df 
            ,B = s1
            ,maxiter=250)
summary(s7)

# model with ng=8
s8 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 8, data = df 
            ,B = s1
            ,maxiter=250)
summary(s8)

# model with ng=9
s9 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
            + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 9, data = df 
            ,B = s1
            ,maxiter=250)
summary(s9)

# model with ng=10
s10 <-  lcmm(fixed=r_adm3~ time + stressor_climate_intensity 
             + stressor_conflict_intensity + stressor_markets_intensity
            ,random =~time 
            ,mixture=~time
            ,subject = 'adm3ID', link = "splines"
            ,ng = 10, data = df 
            ,B = s1
            ,maxiter=250)
summary(s10)

# compare model fit based on smallest BIC 
fit <- summarytable(s1, s2, s3, s4, s5, s6, s7, s8, s9, s10)

```

Next, we use the model to categorize wards into one of the four
classes in terms of their resilience trajectory.

```{r lcmm_fit, eval=FALSE, message=FALSE, include=TRUE, echo=TRUE}

# residuals 
plot(s4)

# postprob plot
plot(s4,which="postprob") 

# post prob's for 4 classes
postprob(s4)
round(summary(as.numeric(s4$pprob[s4$pprob[,"class"]==1,"prob1"])),2)
round(summary(as.numeric(s4$pprob[s4$pprob[,"class"]==2,"prob2"])),2)
round(summary(as.numeric(s4$pprob[s4$pprob[,"class"]==3,"prob3"])),2)
round(summary(as.numeric(s4$pprob[s4$pprob[,"class"]==4,"prob4"])),2)

# assign classes to observations
df$adm3ID <- as.character(df$adm3ID)
people4 <- as.data.frame(s4$pprob[,1:2])
people4$adm3ID <- as.character(people4$adm3ID)
df <- merge(df, people4, by = "adm3ID", all.x = TRUE)
```

```{r fig2_left, include=TRUE, echo = TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Ward resilience trajectory assigned using the latent-class mixed model. Blue dots display the mean proportion of malnourished children relative to the population size per ward for the same period (Figure 2, left)", fig.width=10,fig.height=11}

# load household data 
df <- read.csv("data/household_all.csv", stringsAsFactors = FALSE)

# add spatial information
Kenya3_ndma <- merge(Kenya3_ndma, df[!duplicated(df$adm3ID),], 
                     by.x = "adm3ID.y", by.y = "adm3ID")

# plot left side of figure 2
legend_size <- c(1,2,3,4,5)

fig2.1 = ggplot() + 
  geom_sf(data = Kenya1,  inherit.aes = FALSE, fill = "white", 
          color= "#5c666f", linewidth = .25, alpha = .001) + 
  geom_sf(data = Kenya3_ndma, aes(fill = class), alpha= .65) + 
  stat_sf_coordinates(data = Kenya3_ndma, aes(color = ipc), 
                      size = 2.5, alpha = .9) +
  scale_fill_manual(name = "Resilience trajectory",
                    values = c("#ffb3de" ,"#ff6699", "#ff6161","#de2414")) +
  guides(colour=guide_legend(override.aes = list(size = 5), 
                             title.position = "top", nrow = 1), size = FALSE) + 
  scale_color_manual(labels = c("< 5.0", "5.0 - 9.9", "10.0 - 14.9", 
                                "15.0 - 29.9", "< 30.0"),
                     name = "Malnutrition prevalence (%)" ,
                     values = c("#6693f5", "#91AEEF","#a3a9fd",
                                "#6048c0", "#491e96")) +
  theme_void() + 
  theme(legend.position = "right", 
        legend.box = "vertical", 
        legend.box.spacing = element_blank(),
        legend.title = element_text(size=10, face = "bold"), 
        legend.text = element_text(size=10)) + 
  coord_sf(xlim = c(32, 45), ylim = c(-6, 6)) +
  labs(x = "", y = "")

plot(fig2.1)

```

```{r fig3_top, include=TRUE, echo = TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Observed trajectories of ward resilience to acute malnutrition at monthly intervals (Figure 3, top)", fig.height=3}

fig3_top <- ggplot(df, aes(date, r_adm3, group=adm3ID, color=class)) + 
          geom_smooth(data=df, 
                      aes(y=r_adm3, x=date, colour="gray93"), 
                      method="auto", size=0.3, se=FALSE, alpha=0.1) +  
          geom_smooth(data=df[grepl("chronic", df$class),], 
                      aes(y=r_adm3, x=date, group=class, 
                          colour=REDS[4]), method="auto",se=T, 
                      size=1.2, alpha=0.1) +
          geom_smooth(data=df[grepl("decrease", df$class),], 
                      aes(y=r_adm3, x=date, group=class, 
                          colour=REDS[2]), method="auto",se=T, 
                      size=1.2, alpha=0.1) +
          geom_smooth(data=df[grepl("increase", df$class),], 
                      aes(y=r_adm3, x=date, group=class, 
                          colour=REDS[3]), method="auto", se=T, 
                      size=1.2, alpha=0.1) +
          geom_smooth(data=df[grepl("robust", df$class),], 
                      aes(y=r_adm3, x=date, group=class, 
                          colour=REDS[1]), method="auto", se=T, 
                      size=1.2, alpha=0.1) +
          scale_colour_manual(name = 'Resilience Trajectory',
                      values = c('gray93', REDS[1], REDS[2], 
                                 REDS[3], REDS[4]),
                      breaks = c('gray93', REDS[1], REDS[2], 
                                 REDS[3], REDS[4]),
                      labels = c('Ward', 'Chronic', 'Decrease',
                                 'Increase', 'Robust')) +
        scale_y_continuous(limits = c(0, 1.0), 
                           breaks=c(0.00, 0.25, 0.5, 0.75, 1.0), 
                           labels=c("0.00", "0.25", "0.5", "0.75", "1.00")) +
          theme(panel.grid.major = element_blank(), 
                panel.grid.minor = element_blank(), 
                panel.background = element_blank(), 
                axis.line = element_line(colour = "black"),
                legend.position="bottom", 
                legend.box = "horizontal", 
                legend.title = element_text(size=10, face="bold"),
                legend.text = element_text(size=10), 
                plot.title = element_text(hjust = 0.5, face="bold", size=10),
                axis.title.y = element_text(size=10,face="bold",  
                                            margin=margin(r=15)),
                axis.title.x = element_text(size=10, face="bold", 
                                            margin=margin(t=15))) + 
          labs(y="Resilience", x="Year")

plot(fig3_top)

```

```{r fig4, include=TRUE, echo = FALSE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Resilience trajectories per assigned latent class for Kenyan wards (Figure 4)"}

df$date <- as.Date(df$date)
df$class <- recode_factor(df$class, `chronic` = 1,
                           `decrease` = 4, `robust` = 2, `increase` = 3)

# chronic 
fig4.1 <- ggplot() + 
  geom_smooth(data=df[grepl("3", df$class),], aes(y=r_adm3, x=date, group=adm3ID, colour=REDS[1]), 
              method="auto", se=T, size=.3, alpha =.15, show.legend=FALSE) +
  scale_colour_manual(name = 'Resilience Trajectory',
                      values = c('gray93', REDS[1]),
                      breaks = c('gray93', REDS[1]),
                      labels = c('Ward', "Chronic")) +
  scale_x_date(limits = c(as.Date("2016-06-01"),as.Date("2020-04-01"))) + 
  scale_y_continuous(limits = c(0, 1.0), breaks=c(0.00, 0.25, 0.5, 0.75, 1.0), 
                     labels=c("0.00", "0.25", "0.5", "0.75", "1.00")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        legend.position="bottom", legend.box = "horizontal", legend.title = element_text(size=.38, face="bold"),
        legend.text = element_text(size=18), 
        plot.title = element_text(hjust = 0.5, size=18, face="bold", colour = REDS[1]),
        axis.title.y = element_text(size=16,face="bold",  margin=margin(r=15)),
        axis.title.x = element_text(size=16, face="bold", margin=margin(t=15))) + 
  labs(y="Resilience", x="", title = "Class A: Chronic")

# robust
fig4.2 <- ggplot() + 
  geom_smooth(data=df[grepl("2", df$class),], aes(y=r_adm3, x=date, group=adm3ID, colour=REDS[4]), 
              method="auto", se=T, size=.3, alpha =.15, show.legend=FALSE) +
  scale_colour_manual(name = 'Resilience Trajectory',
                      values = c('gray93', REDS[4]),
                      breaks = c('gray93', REDS[4]),
                      labels = c('Ward', "Robust")) +
  scale_x_date(limits = c(as.Date("2016-06-01"),as.Date("2020-04-01"))) + 
  scale_y_continuous(limits = c(0, 1.0), breaks=c(0.00, 0.25, 0.5, 0.75, 1.0), 
                     labels=c("0.00", "0.25", "0.5", "0.75", "1.00")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        legend.position="bottom", legend.box = "horizontal", legend.title = element_text(size=18, face="bold"),
        legend.text = element_text(size=18), 
        plot.title = element_text(hjust = 0.5, size=18, face="bold", colour = REDS[4]),
        axis.title.y = element_text(size=16,face="bold",  margin=margin(r=15)),
        axis.title.x = element_text(size=16, face="bold", margin=margin(t=15))) + 
  labs(y="Resilience", x="Year", title = "Class C: Robust")

# increase
fig4.3 <- ggplot() + 
  geom_smooth(data=df[grepl("1", df$class),], aes(y=r_adm3, x=date, group=adm3ID, colour=REDS[3]), 
              method="auto", se=T, size=.3, alpha =.15, show.legend=FALSE) +
  scale_colour_manual(name = 'Resilience Trajectory',
                      values = c('gray93', REDS[3]),
                      breaks = c('gray93', REDS[3]),
                      labels = c('Ward', "Increase")) +
  scale_x_date(limits = c(as.Date("2016-06-01"),as.Date("2020-04-01"))) + 
  scale_y_continuous(limits = c(0, 1.0), breaks=c(0.00, 0.25, 0.5, 0.75, 1.0), 
                     labels=c("0.00", "0.25", "0.5", "0.75", "1.00")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        legend.position="bottom", legend.box = "horizontal", legend.title = element_text(size=18, face="bold"),
        legend.text = element_text(size=18), 
        plot.title = element_text(hjust = 0.5, size=18, face="bold", colour = REDS[3]),
        axis.title.y = element_text(size=16,face="bold",  margin=margin(r=15)),
        axis.title.x = element_text(size=16, face="bold", margin=margin(t=15))) + 
  labs(y="Resilience", x="Year", title = "Class B: Increasing")


# decrease
fig4.4 <- ggplot() + 
  geom_smooth(data=df[grepl("4", df$class),], aes(y=r_adm3, x=date, group=adm3ID, colour=REDS[2]), 
              method="auto", se=T, size=.3, alpha =.15, show.legend=FALSE) +
  scale_colour_manual(name = 'Resilience Trajectory',
                      values = c('gray93', REDS[2]),
                      breaks = c('gray93', REDS[2]),
                      labels = c('Ward', "Decrease")) +
  scale_x_date(limits = c(as.Date("2016-06-01"),as.Date("2020-04-01"))) + 
  scale_y_continuous(limits = c(0, 1.0), breaks=c(0.00, 0.25, 0.5, 0.75, 1.0), 
                     labels=c("0.00", "0.25", "0.5", "0.75", "1.00")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        legend.position="bottom", legend.box = "horizontal", legend.title = element_text(size=18, face="bold"),
        legend.text = element_text(size=18), 
        plot.title = element_text(hjust = 0.5, size=18, face="bold",colour = REDS[2]),
        axis.title.y = element_text(size=16,face="bold",  margin=margin(r=15)),
        axis.title.x = element_text(size=16, face="bold", margin=margin(t=15))) + 
  labs(y="Resilience", x="Year", title = "Class D: Decreasing")

# plot 
fig4 <- plot_grid(fig4.1,
          fig4.2,
          fig4.3,
          fig4.4, 
          ncol = 2, nrow = 2)

plot(fig4)

```

# 3. Cox regression

Second, we use cox regression analysis to explore household risk
depending on ward-resilience trajectory, while controlling for a minimal
set of control variables.

The raw data used is `household_all.csv`.

To perform the cox regression analysis, we provide the following
variables

-   *AMN_dummy*: binary variable indicating that a household is
    malnourished, i.e. if min 1 observation is MUAC \< 125mm

-   *class*: assigned ward-resilience trajectory (increase, decrease,
    chronic, robust)

-   *time*: number of survey months

-   *switch_below*: binary variable indicating that a household becomes
    malnourished at time t

-   *switch_recover*: binary variable indicating that a household
    recovers from malnutrition at time t

-   *status*: categorical variable indicating household switches from
    normal nutrition to undernourished and vice versa

-   *tevent:* time in months until switch in status

Note that the above calculations are rather data-intensive and were
processed separately. In addition, we include the following fixed
household characteristics as background variables:

-   *gender household head* (female, male)

-   *education household head* (true, false)

-   *livelihood* (pastoralist, other)

-   *water source* (safe, unsafe)

`rules_coding_agg.txt` provides a summary of the coding and aggregation
rules.

```{r cox.uni, eval=TRUE, message=FALSE, warning=FALSE, include=TRUE, echo=TRUE}

# load household data 
df <- read.csv("data/household_all.csv", stringsAsFactors = TRUE)

# use chronic as a reference category 
df$class = relevel(df$class, ref = "chronic")

# recode status 
df$status2 <- as.integer(df$status)
summary(df$status2)

# null model with time switch below nutrition threshold as DV
fit2 <- survfit(Surv(tevent, status2) ~ class, data=df)

break_values <- c(0, 0.25, 0.5, 0.75, 1)
summary(fit2)

# time-to-event-plot
cox.uni <- ggsurvplot(fit2, data = df, 
                        risk.table = FALSE, font.y = c(18, "bold", "black"), 
                        xlim = c(1,12),  xlab = "Time in Months", break.time.by = 1,
                        ylim = c(0,1),  ylab = "Survival Probability", 
                        break.by = break_values,
                        censor.shape="|", censor.size = 1,
                        palette =c(REDS[1], REDS[2], REDS[3], REDS[4]),
                        ggtheme= theme(panel.grid.major = element_blank(), 
                                       panel.grid.minor = element_blank(), 
                                       panel.background = element_blank(), 
                                       axis.line = element_line(colour = "black")))

cox.uni

# univariate regression results
res.cox <- coxph(Surv(tevent, status2) ~ class, data = df)
res.cox
summary(res.cox)

cox.uni.plot <- ggforest(res.cox, data = df)
cox.uni.plot

```

```{r fig5, include=TRUE, echo = TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.cap="Multivariate Cox regression results for household risk (Figure 5)."}

# rename 
df <- df %>% 
  dplyr::rename("resilience trajectory" = "class",
                "water source" = "water_source")

# multivariate regression
res.cox2 <- coxph(Surv(tevent, status2) ~ `resilience trajectory` + livelihood 
                  +  gender + education + `water source` , data = df)
res.cox2
summary(res.cox2)


fig5 <- ggforest(res.cox2, data = df, fontsize = 1) 
plot(fig5)

```
